library(tidyverse)
library(lubridate) # Working with dates
library(rstan)
set.seed(1)


Municipality_data <- read.csv("../Data/Covid/Municipality_data_2021_01_18.csv") %>%
  as_tibble() %>%
  filter(AnnéeDécès != "") %>%
  # Filtering what is actually population data
  filter(AnnéeDécès > 1900) # Filtering out 3954907 MILAN RENEE/     2           1923             6             7 99                              127 SAINT-EUF┬ÉENIA      …        201

source("prepare_population_data.R")

selected_years = c(2015, 2016, 2017, 2019, 2020)

Mortality_data_age_sex <- selected_years %>%
  lapply(function(y) {
    mortality_period_municipality_level_age_sex(start_date = ymd(paste(y, "/03/15", sep = "")), end_date = ymd(paste(y, "/07/26", sep = ""))) %>%
      mutate(year = y)
  }) %>%
  bind_rows() %>% 
  mutate(age_class = age_class)

Mortality_data_age_sex_joined <- population_age_structure %>%
  filter(year %in% selected_years) %>%
  rename(Municipality_code = Code) %>%
  left_join(Mortality_data_age_sex) %>%
  mutate(NbDeath = ifelse(test = is.na(NbDeath), yes = 0, no = NbDeath))

Mortality_data_age_sex_joined_corrected = Mortality_data_age_sex_joined %>% 
  mutate(population = mapply(function(x,y) max(x,y), population, NbDeath))

age_sex_specific_IFR = read_delim("../Data/Covid/age_sex_specific_mortality_rates.csv", " ") %>% 
  as_tibble() %>% 
  select(age_group, Male, Female) %>% 
  mutate(age_class = age_group_name_age_group_converter(age_group_name = age_group)) %>% 
  rename(male = Male, female = Female) %>% 
  gather(sex, rho_as, male:female) %>% 
  mutate(rho_as = rho_as/100) #Converting percentages to real fractions

selected_cities = Mortality_data_age_sex_joined %>% 
  filter(year == 2020) %>% 
  filter(population_total >= 710) %>% # Filtering which should match the electoral data filtering
  .$Municipality_code

full_data = left_join(Mortality_data_age_sex_joined_corrected, age_sex_specific_IFR) %>% 
  filter(Municipality_code %in% selected_cities) %>%
  mutate(covid = ifelse(year == 2020, yes = 1, no = 0)) %>% 
  filter(population>0) %>% 
  filter(age_class > 2) %>% 
  filter(age_class < 17) %>% 
  mutate(age_class = as.numeric(as.factor(age_class))) %>% 
  mutate(municipality_idx = Municipality_code %>% as.factor() %>% as.numeric())

create_municipality_code_converter = function(Municipality_idx, Municipality_code){
  dict = Municipality_code %>% 
    unique %>% 
    setNames(Municipality_idx %>% unique()) 
  conversion_function = function(x){
    sapply(x, function(xi) dict[xi])
  }
  return(conversion_function)
}

Municipality_code_converter = create_municipality_code_converter(full_data$municipality_idx, full_data$Municipality_code)

stan_data = list(N = full_data %>% nrow(),
                 Nmunicipalities = full_data$municipality_idx %>% unique() %>% length(),
                 Ymtas = full_data$NbDeath,
                 Nmtas = full_data$population,
                 municipality = full_data$municipality_idx,
                 covid = full_data$covid,
                 Nage_class = full_data$age_class %>% unique() %>% length(),
                 age_class = full_data$age_class,
                 age_class_scaled = (full_data$age_class-mean(full_data$age_class))/2*sd(full_data$age_class),
                 sex = ifelse(test = full_data$sex == "male", yes = 1, no = 0),
                 rho_as = full_data$rho_as,
                 Municipality_code_converter = Municipality_code_converter)

saveRDS(stan_data, file = "stan_data_for_fit_age_sex_2021_01_18_4weeks_second_round.rds")
